1   /*
2    * Copyright (C) 2008 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    * http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package com.google.common.base;
18  
19  import static com.google.common.base.Preconditions.checkArgument;
20  import static com.google.common.base.Preconditions.checkNotNull;
21  
22  import com.google.common.annotations.Beta;
23  import com.google.common.annotations.GwtCompatible;
24  
25  import java.util.Arrays;
26  
27  import javax.annotation.CheckReturnValue;
28  
29  /**
30   * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
31   * for any {@link Object}. Also offers basic text processing methods based on this function.
32   * Implementations are strongly encouraged to be side-effect-free and immutable.
33   *
34   * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
35   * "any character {@code c} for which {@code this.matches(c)} returns {@code true}".
36   *
37   * <p><b>Note:</b> This class deals only with {@code char} values; it does not understand
38   * supplementary Unicode code points in the range {@code 0x10000} to {@code 0x10FFFF}. Such logical
39   * characters are encoded into a {@code String} using surrogate pairs, and a {@code CharMatcher}
40   * treats these just as two separate characters.
41   *
42   * <p>Example usages: <pre>
43   *   String trimmed = {@link #WHITESPACE WHITESPACE}.{@link #trimFrom trimFrom}(userInput);
44   *   if ({@link #ASCII ASCII}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre>
45   *
46   * <p>See the Guava User Guide article on <a href=
47   * "http://code.google.com/p/guava-libraries/wiki/StringsExplained#CharMatcher">
48   * {@code CharMatcher}</a>.
49   *
50   * @author Kevin Bourrillion
51   * @since 1.0
52   */
53  @Beta // Possibly change from chars to code points; decide constants vs. methods
54  @GwtCompatible(emulated = true)
55  public abstract class CharMatcher implements Predicate<Character> {
56  
57    // Constants
58    /**
59     * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
60     * interpreted as a break between words for formatting purposes). See {@link #WHITESPACE} for a
61     * discussion of that term.
62     *
63     * @since 2.0
64     */
65    public static final CharMatcher BREAKING_WHITESPACE = new CharMatcher() {
66      @Override
67      public boolean matches(char c) {
68        switch (c) {
69          case '\t':
70          case '\n':
71          case '\013':
72          case '\f':
73          case '\r':
74          case ' ':
75          case '\u0085':
76          case '\u1680':
77          case '\u2028':
78          case '\u2029':
79          case '\u205f':
80          case '\u3000':
81            return true;
82          case '\u2007':
83            return false;
84          default:
85            return c >= '\u2000' && c <= '\u200a';
86        }
87      }
88  
89      @Override
90      public String toString() {
91        return "CharMatcher.BREAKING_WHITESPACE";
92      }
93    };
94  
95    /**
96     * Determines whether a character is ASCII, meaning that its code point is less than 128.
97     */
98    public static final CharMatcher ASCII = inRange('\0', '\u007f', "CharMatcher.ASCII");
99  
100   private static class RangesMatcher extends CharMatcher {
101     private final char[] rangeStarts;
102     private final char[] rangeEnds;
103 
104     RangesMatcher(String description, char[] rangeStarts, char[] rangeEnds) {
105       super(description);
106       this.rangeStarts = rangeStarts;
107       this.rangeEnds = rangeEnds;
108       checkArgument(rangeStarts.length == rangeEnds.length);
109       for (int i = 0; i < rangeStarts.length; i++) {
110         checkArgument(rangeStarts[i] <= rangeEnds[i]);
111         if (i + 1 < rangeStarts.length) {
112           checkArgument(rangeEnds[i] < rangeStarts[i + 1]);
113         }
114       }
115     }
116 
117     @Override
118     public boolean matches(char c) {
119       int index = Arrays.binarySearch(rangeStarts, c);
120       if (index >= 0) {
121         return true;
122       } else {
123         index = ~index - 1;
124         return index >= 0 && c <= rangeEnds[index];
125       }
126     }
127   }
128 
129   // Must be in ascending order.
130   private static final String ZEROES = "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6"
131       + "\u0c66\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946\u19d0\u1b50\u1bb0"
132       + "\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
133 
134   private static final String NINES;
135   static {
136     StringBuilder builder = new StringBuilder(ZEROES.length());
137     for (int i = 0; i < ZEROES.length(); i++) {
138       builder.append((char) (ZEROES.charAt(i) + 9));
139     }
140     NINES = builder.toString();
141   }
142 
143   /**
144    * Determines whether a character is a digit according to
145    * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>.
146    * If you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
147    */
148   public static final CharMatcher DIGIT = new RangesMatcher(
149       "CharMatcher.DIGIT", ZEROES.toCharArray(), NINES.toCharArray());
150 
151   /**
152    * Determines whether a character is a digit according to {@linkplain Character#isDigit(char)
153    * Java's definition}. If you only care to match ASCII digits, you can use {@code
154    * inRange('0', '9')}.
155    */
156   public static final CharMatcher JAVA_DIGIT = new CharMatcher("CharMatcher.JAVA_DIGIT") {
157     @Override public boolean matches(char c) {
158       return Character.isDigit(c);
159     }
160   };
161 
162   /**
163    * Determines whether a character is a letter according to {@linkplain Character#isLetter(char)
164    * Java's definition}. If you only care to match letters of the Latin alphabet, you can use {@code
165    * inRange('a', 'z').or(inRange('A', 'Z'))}.
166    */
167   public static final CharMatcher JAVA_LETTER = new CharMatcher("CharMatcher.JAVA_LETTER") {
168     @Override public boolean matches(char c) {
169       return Character.isLetter(c);
170     }
171   };
172 
173   /**
174    * Determines whether a character is a letter or digit according to {@linkplain
175    * Character#isLetterOrDigit(char) Java's definition}.
176    */
177   public static final CharMatcher JAVA_LETTER_OR_DIGIT =
178       new CharMatcher("CharMatcher.JAVA_LETTER_OR_DIGIT") {
179     @Override public boolean matches(char c) {
180       return Character.isLetterOrDigit(c);
181     }
182   };
183 
184   /**
185    * Determines whether a character is upper case according to {@linkplain
186    * Character#isUpperCase(char) Java's definition}.
187    */
188   public static final CharMatcher JAVA_UPPER_CASE =
189       new CharMatcher("CharMatcher.JAVA_UPPER_CASE") {
190     @Override public boolean matches(char c) {
191       return Character.isUpperCase(c);
192     }
193   };
194 
195   /**
196    * Determines whether a character is lower case according to {@linkplain
197    * Character#isLowerCase(char) Java's definition}.
198    */
199   public static final CharMatcher JAVA_LOWER_CASE =
200       new CharMatcher("CharMatcher.JAVA_LOWER_CASE") {
201     @Override public boolean matches(char c) {
202       return Character.isLowerCase(c);
203     }
204   };
205 
206   /**
207    * Determines whether a character is an ISO control character as specified by {@link
208    * Character#isISOControl(char)}.
209    */
210   public static final CharMatcher JAVA_ISO_CONTROL =
211       inRange('\u0000', '\u001f')
212       .or(inRange('\u007f', '\u009f'))
213       .withToString("CharMatcher.JAVA_ISO_CONTROL");
214 
215   /**
216    * Determines whether a character is invisible; that is, if its Unicode category is any of
217    * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
218    * PRIVATE_USE according to ICU4J.
219    */
220   public static final CharMatcher INVISIBLE = new RangesMatcher("CharMatcher.INVISIBLE", (
221       "\u0000\u007f\u00ad\u0600\u061c\u06dd\u070f\u1680\u180e\u2000\u2028\u205f\u2066\u2067\u2068"
222       + "\u2069\u206a\u3000\ud800\ufeff\ufff9\ufffa").toCharArray(), (
223       "\u0020\u00a0\u00ad\u0604\u061c\u06dd\u070f\u1680\u180e\u200f\u202f\u2064\u2066\u2067\u2068"
224       + "\u2069\u206f\u3000\uf8ff\ufeff\ufff9\ufffb").toCharArray());
225 
226   private static String showCharacter(char c) {
227     String hex = "0123456789ABCDEF";
228     char[] tmp = {'\\', 'u', '\0', '\0', '\0', '\0'};
229     for (int i = 0; i < 4; i++) {
230       tmp[5 - i] = hex.charAt(c & 0xF);
231       c >>= 4;
232     }
233     return String.copyValueOf(tmp);
234 
235   }
236 
237   /**
238    * Determines whether a character is single-width (not double-width). When in doubt, this matcher
239    * errs on the side of returning {@code false} (that is, it tends to assume a character is
240    * double-width).
241    *
242    * <p><b>Note:</b> as the reference file evolves, we will modify this constant to keep it up to
243    * date.
244    */
245   public static final CharMatcher SINGLE_WIDTH = new RangesMatcher("CharMatcher.SINGLE_WIDTH",
246       "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(),
247       "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray());
248 
249   /** Matches any character. */
250   public static final CharMatcher ANY =
251       new FastMatcher("CharMatcher.ANY") {
252         @Override public boolean matches(char c) {
253           return true;
254         }
255 
256         @Override public int indexIn(CharSequence sequence) {
257           return (sequence.length() == 0) ? -1 : 0;
258         }
259 
260         @Override public int indexIn(CharSequence sequence, int start) {
261           int length = sequence.length();
262           Preconditions.checkPositionIndex(start, length);
263           return (start == length) ? -1 : start;
264         }
265 
266         @Override public int lastIndexIn(CharSequence sequence) {
267           return sequence.length() - 1;
268         }
269 
270         @Override public boolean matchesAllOf(CharSequence sequence) {
271           checkNotNull(sequence);
272           return true;
273         }
274 
275         @Override public boolean matchesNoneOf(CharSequence sequence) {
276           return sequence.length() == 0;
277         }
278 
279         @Override public String removeFrom(CharSequence sequence) {
280           checkNotNull(sequence);
281           return "";
282         }
283 
284         @Override public String replaceFrom(CharSequence sequence, char replacement) {
285           char[] array = new char[sequence.length()];
286           Arrays.fill(array, replacement);
287           return new String(array);
288         }
289 
290         @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
291           StringBuilder retval = new StringBuilder(sequence.length() * replacement.length());
292           for (int i = 0; i < sequence.length(); i++) {
293             retval.append(replacement);
294           }
295           return retval.toString();
296         }
297 
298         @Override public String collapseFrom(CharSequence sequence, char replacement) {
299           return (sequence.length() == 0) ? "" : String.valueOf(replacement);
300         }
301 
302         @Override public String trimFrom(CharSequence sequence) {
303           checkNotNull(sequence);
304           return "";
305         }
306 
307         @Override public int countIn(CharSequence sequence) {
308           return sequence.length();
309         }
310 
311         @Override public CharMatcher and(CharMatcher other) {
312           return checkNotNull(other);
313         }
314 
315         @Override public CharMatcher or(CharMatcher other) {
316           checkNotNull(other);
317           return this;
318         }
319 
320         @Override public CharMatcher negate() {
321           return NONE;
322         }
323       };
324 
325   /** Matches no characters. */
326   public static final CharMatcher NONE =
327       new FastMatcher("CharMatcher.NONE") {
328         @Override public boolean matches(char c) {
329           return false;
330         }
331 
332         @Override public int indexIn(CharSequence sequence) {
333           checkNotNull(sequence);
334           return -1;
335         }
336 
337         @Override public int indexIn(CharSequence sequence, int start) {
338           int length = sequence.length();
339           Preconditions.checkPositionIndex(start, length);
340           return -1;
341         }
342 
343         @Override public int lastIndexIn(CharSequence sequence) {
344           checkNotNull(sequence);
345           return -1;
346         }
347 
348         @Override public boolean matchesAllOf(CharSequence sequence) {
349           return sequence.length() == 0;
350         }
351 
352         @Override public boolean matchesNoneOf(CharSequence sequence) {
353           checkNotNull(sequence);
354           return true;
355         }
356 
357         @Override public String removeFrom(CharSequence sequence) {
358           return sequence.toString();
359         }
360 
361         @Override public String replaceFrom(CharSequence sequence, char replacement) {
362           return sequence.toString();
363         }
364 
365         @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
366           checkNotNull(replacement);
367           return sequence.toString();
368         }
369 
370         @Override public String collapseFrom(CharSequence sequence, char replacement) {
371           return sequence.toString();
372         }
373 
374         @Override public String trimFrom(CharSequence sequence) {
375           return sequence.toString();
376         }
377 
378         @Override
379         public String trimLeadingFrom(CharSequence sequence) {
380           return sequence.toString();
381         }
382 
383         @Override
384         public String trimTrailingFrom(CharSequence sequence) {
385           return sequence.toString();
386         }
387 
388         @Override public int countIn(CharSequence sequence) {
389           checkNotNull(sequence);
390           return 0;
391         }
392 
393         @Override public CharMatcher and(CharMatcher other) {
394           checkNotNull(other);
395           return this;
396         }
397 
398         @Override public CharMatcher or(CharMatcher other) {
399           return checkNotNull(other);
400         }
401 
402         @Override public CharMatcher negate() {
403           return ANY;
404         }
405       };
406 
407   // Static factories
408 
409   /**
410    * Returns a {@code char} matcher that matches only one specified character.
411    */
412   public static CharMatcher is(final char match) {
413     String description = "CharMatcher.is('" + showCharacter(match) + "')";
414     return new FastMatcher(description) {
415       @Override public boolean matches(char c) {
416         return c == match;
417       }
418 
419       @Override public String replaceFrom(CharSequence sequence, char replacement) {
420         return sequence.toString().replace(match, replacement);
421       }
422 
423       @Override public CharMatcher and(CharMatcher other) {
424         return other.matches(match) ? this : NONE;
425       }
426 
427       @Override public CharMatcher or(CharMatcher other) {
428         return other.matches(match) ? other : super.or(other);
429       }
430 
431       @Override public CharMatcher negate() {
432         return isNot(match);
433       }
434     };
435   }
436 
437   /**
438    * Returns a {@code char} matcher that matches any character except the one specified.
439    *
440    * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
441    */
442   public static CharMatcher isNot(final char match) {
443     String description = "CharMatcher.isNot('" + showCharacter(match) + "')";
444     return new FastMatcher(description) {
445       @Override public boolean matches(char c) {
446         return c != match;
447       }
448 
449       @Override public CharMatcher and(CharMatcher other) {
450         return other.matches(match) ? super.and(other) : other;
451       }
452 
453       @Override public CharMatcher or(CharMatcher other) {
454         return other.matches(match) ? ANY : this;
455       }
456 
457       @Override public CharMatcher negate() {
458         return is(match);
459       }
460     };
461   }
462 
463   /**
464    * Returns a {@code char} matcher that matches any character present in the given character
465    * sequence.
466    */
467   public static CharMatcher anyOf(final CharSequence sequence) {
468     switch (sequence.length()) {
469       case 0:
470         return NONE;
471       case 1:
472         return is(sequence.charAt(0));
473       case 2:
474         return isEither(sequence.charAt(0), sequence.charAt(1));
475       default:
476         // continue below to handle the general case
477     }
478     // TODO(user): is it potentially worth just going ahead and building a precomputed matcher?
479     final char[] chars = sequence.toString().toCharArray();
480     Arrays.sort(chars);
481     StringBuilder description = new StringBuilder("CharMatcher.anyOf(\"");
482     for (char c : chars) {
483       description.append(showCharacter(c));
484     }
485     description.append("\")");
486     return new CharMatcher(description.toString()) {
487       @Override public boolean matches(char c) {
488         return Arrays.binarySearch(chars, c) >= 0;
489       }
490     };
491   }
492 
493   private static CharMatcher isEither(
494       final char match1,
495       final char match2) {
496     String description = "CharMatcher.anyOf(\"" +
497         showCharacter(match1) + showCharacter(match2) + "\")";
498     return new FastMatcher(description) {
499       @Override public boolean matches(char c) {
500         return c == match1 || c == match2;
501       }
502     };
503   }
504 
505   /**
506    * Returns a {@code char} matcher that matches any character not present in the given character
507    * sequence.
508    */
509   public static CharMatcher noneOf(CharSequence sequence) {
510     return anyOf(sequence).negate();
511   }
512 
513   /**
514    * Returns a {@code char} matcher that matches any character in a given range (both endpoints are
515    * inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
516    * CharMatcher.inRange('a', 'z')}.
517    *
518    * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
519    */
520   public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
521     checkArgument(endInclusive >= startInclusive);
522     String description = "CharMatcher.inRange('" +
523         showCharacter(startInclusive) + "', '" +
524         showCharacter(endInclusive) + "')";
525     return inRange(startInclusive, endInclusive, description);
526   }
527 
528   static CharMatcher inRange(final char startInclusive, final char endInclusive,
529       String description) {
530     return new FastMatcher(description) {
531       @Override public boolean matches(char c) {
532         return startInclusive <= c && c <= endInclusive;
533       }
534     };
535   }
536 
537   /**
538    * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
539    * which operates on primitive {@code char} instances instead.
540    */
541   public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
542     checkNotNull(predicate);
543     if (predicate instanceof CharMatcher) {
544       return (CharMatcher) predicate;
545     }
546     String description = "CharMatcher.forPredicate(" + predicate + ")";
547     return new CharMatcher(description) {
548       @Override public boolean matches(char c) {
549         return predicate.apply(c);
550       }
551 
552       @Override public boolean apply(Character character) {
553         return predicate.apply(checkNotNull(character));
554       }
555     };
556   }
557 
558   // State
559   final String description;
560 
561   // Constructors
562 
563   /**
564    * Sets the {@code toString()} from the given description.
565    */
566   CharMatcher(String description) {
567     this.description = description;
568   }
569 
570   /**
571    * Constructor for use by subclasses. When subclassing, you may want to override
572    * {@code toString()} to provide a useful description.
573    */
574   protected CharMatcher() {
575     description = super.toString();
576   }
577 
578   // Abstract methods
579 
580   /** Determines a true or false value for the given character. */
581   public abstract boolean matches(char c);
582 
583   // Non-static factories
584 
585   /**
586    * Returns a matcher that matches any character not matched by this matcher.
587    */
588   public CharMatcher negate() {
589     return new NegatedMatcher(this);
590   }
591 
592   private static class NegatedMatcher extends CharMatcher {
593     final CharMatcher original;
594 
595     NegatedMatcher(String toString, CharMatcher original) {
596       super(toString);
597       this.original = original;
598     }
599 
600     NegatedMatcher(CharMatcher original) {
601       this(original + ".negate()", original);
602     }
603 
604     @Override public boolean matches(char c) {
605       return !original.matches(c);
606     }
607 
608     @Override public boolean matchesAllOf(CharSequence sequence) {
609       return original.matchesNoneOf(sequence);
610     }
611 
612     @Override public boolean matchesNoneOf(CharSequence sequence) {
613       return original.matchesAllOf(sequence);
614     }
615 
616     @Override public int countIn(CharSequence sequence) {
617       return sequence.length() - original.countIn(sequence);
618     }
619 
620     @Override public CharMatcher negate() {
621       return original;
622     }
623 
624     @Override
625     CharMatcher withToString(String description) {
626       return new NegatedMatcher(description, original);
627     }
628   }
629 
630   /**
631    * Returns a matcher that matches any character matched by both this matcher and {@code other}.
632    */
633   public CharMatcher and(CharMatcher other) {
634     return new And(this, checkNotNull(other));
635   }
636 
637   private static class And extends CharMatcher {
638     final CharMatcher first;
639     final CharMatcher second;
640 
641     And(CharMatcher a, CharMatcher b) {
642       this(a, b, "CharMatcher.and(" + a + ", " + b + ")");
643     }
644 
645     And(CharMatcher a, CharMatcher b, String description) {
646       super(description);
647       first = checkNotNull(a);
648       second = checkNotNull(b);
649     }
650 
651     @Override
652     public boolean matches(char c) {
653       return first.matches(c) && second.matches(c);
654     }
655 
656     @Override
657     CharMatcher withToString(String description) {
658       return new And(first, second, description);
659     }
660   }
661 
662   /**
663    * Returns a matcher that matches any character matched by either this matcher or {@code other}.
664    */
665   public CharMatcher or(CharMatcher other) {
666     return new Or(this, checkNotNull(other));
667   }
668 
669   private static class Or extends CharMatcher {
670     final CharMatcher first;
671     final CharMatcher second;
672 
673     Or(CharMatcher a, CharMatcher b, String description) {
674       super(description);
675       first = checkNotNull(a);
676       second = checkNotNull(b);
677     }
678 
679     Or(CharMatcher a, CharMatcher b) {
680       this(a, b, "CharMatcher.or(" + a + ", " + b + ")");
681     }
682 
683     @Override
684     public boolean matches(char c) {
685       return first.matches(c) || second.matches(c);
686     }
687 
688     @Override
689     CharMatcher withToString(String description) {
690       return new Or(first, second, description);
691     }
692   }
693 
694   /**
695    * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
696    * query than the original; your mileage may vary. Precomputation takes time and is likely to be
697    * worthwhile only if the precomputed matcher is queried many thousands of times.
698    *
699    * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
700    * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
701    * worthwhile tradeoff in a browser.
702    */
703   public CharMatcher precomputed() {
704     return Platform.precomputeCharMatcher(this);
705   }
706 
707   /**
708    * Subclasses should provide a new CharMatcher with the same characteristics as {@code this},
709    * but with their {@code toString} method overridden with the new description.
710    *
711    * <p>This is unsupported by default.
712    */
713   CharMatcher withToString(String description) {
714     throw new UnsupportedOperationException();
715   }
716 
717   private static final int DISTINCT_CHARS = Character.MAX_VALUE - Character.MIN_VALUE + 1;
718 
719   /**
720    * A matcher for which precomputation will not yield any significant benefit.
721    */
722   abstract static class FastMatcher extends CharMatcher {
723     FastMatcher() {
724       super();
725     }
726 
727     FastMatcher(String description) {
728       super(description);
729     }
730 
731     @Override
732     public final CharMatcher precomputed() {
733       return this;
734     }
735 
736     @Override
737     public CharMatcher negate() {
738       return new NegatedFastMatcher(this);
739     }
740   }
741 
742   static final class NegatedFastMatcher extends NegatedMatcher {
743     NegatedFastMatcher(CharMatcher original) {
744       super(original);
745     }
746 
747     NegatedFastMatcher(String toString, CharMatcher original) {
748       super(toString, original);
749     }
750 
751     @Override
752     public final CharMatcher precomputed() {
753       return this;
754     }
755 
756     @Override
757     CharMatcher withToString(String description) {
758       return new NegatedFastMatcher(description, original);
759     }
760   }
761 
762   // Text processing routines
763 
764   /**
765    * Returns {@code true} if a character sequence contains at least one matching character.
766    * Equivalent to {@code !matchesNoneOf(sequence)}.
767    *
768    * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
769    * character, until this returns {@code true} or the end is reached.
770    *
771    * @param sequence the character sequence to examine, possibly empty
772    * @return {@code true} if this matcher matches at least one character in the sequence
773    * @since 8.0
774    */
775   public boolean matchesAnyOf(CharSequence sequence) {
776     return !matchesNoneOf(sequence);
777   }
778 
779   /**
780    * Returns {@code true} if a character sequence contains only matching characters.
781    *
782    * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
783    * character, until this returns {@code false} or the end is reached.
784    *
785    * @param sequence the character sequence to examine, possibly empty
786    * @return {@code true} if this matcher matches every character in the sequence, including when
787    *         the sequence is empty
788    */
789   public boolean matchesAllOf(CharSequence sequence) {
790     for (int i = sequence.length() - 1; i >= 0; i--) {
791       if (!matches(sequence.charAt(i))) {
792         return false;
793       }
794     }
795     return true;
796   }
797 
798   /**
799    * Returns {@code true} if a character sequence contains no matching characters. Equivalent to
800    * {@code !matchesAnyOf(sequence)}.
801    *
802    * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
803    * character, until this returns {@code false} or the end is reached.
804    *
805    * @param sequence the character sequence to examine, possibly empty
806    * @return {@code true} if this matcher matches every character in the sequence, including when
807    *         the sequence is empty
808    */
809   public boolean matchesNoneOf(CharSequence sequence) {
810     return indexIn(sequence) == -1;
811   }
812 
813   /**
814    * Returns the index of the first matching character in a character sequence, or {@code -1} if no
815    * matching character is present.
816    *
817    * <p>The default implementation iterates over the sequence in forward order calling {@link
818    * #matches} for each character.
819    *
820    * @param sequence the character sequence to examine from the beginning
821    * @return an index, or {@code -1} if no character matches
822    */
823   public int indexIn(CharSequence sequence) {
824     int length = sequence.length();
825     for (int i = 0; i < length; i++) {
826       if (matches(sequence.charAt(i))) {
827         return i;
828       }
829     }
830     return -1;
831   }
832 
833   /**
834    * Returns the index of the first matching character in a character sequence, starting from a
835    * given position, or {@code -1} if no character matches after that position.
836    *
837    * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
838    * start}, calling {@link #matches} for each character.
839    *
840    * @param sequence the character sequence to examine
841    * @param start the first index to examine; must be nonnegative and no greater than {@code
842    *        sequence.length()}
843    * @return the index of the first matching character, guaranteed to be no less than {@code start},
844    *         or {@code -1} if no character matches
845    * @throws IndexOutOfBoundsException if start is negative or greater than {@code
846    *         sequence.length()}
847    */
848   public int indexIn(CharSequence sequence, int start) {
849     int length = sequence.length();
850     Preconditions.checkPositionIndex(start, length);
851     for (int i = start; i < length; i++) {
852       if (matches(sequence.charAt(i))) {
853         return i;
854       }
855     }
856     return -1;
857   }
858 
859   /**
860    * Returns the index of the last matching character in a character sequence, or {@code -1} if no
861    * matching character is present.
862    *
863    * <p>The default implementation iterates over the sequence in reverse order calling {@link
864    * #matches} for each character.
865    *
866    * @param sequence the character sequence to examine from the end
867    * @return an index, or {@code -1} if no character matches
868    */
869   public int lastIndexIn(CharSequence sequence) {
870     for (int i = sequence.length() - 1; i >= 0; i--) {
871       if (matches(sequence.charAt(i))) {
872         return i;
873       }
874     }
875     return -1;
876   }
877 
878   /**
879    * Returns the number of matching characters found in a character sequence.
880    */
881   public int countIn(CharSequence sequence) {
882     int count = 0;
883     for (int i = 0; i < sequence.length(); i++) {
884       if (matches(sequence.charAt(i))) {
885         count++;
886       }
887     }
888     return count;
889   }
890 
891   /**
892    * Returns a string containing all non-matching characters of a character sequence, in order. For
893    * example: <pre>   {@code
894    *
895    *   CharMatcher.is('a').removeFrom("bazaar")}</pre>
896    *
897    * ... returns {@code "bzr"}.
898    */
899   @CheckReturnValue
900   public String removeFrom(CharSequence sequence) {
901     String string = sequence.toString();
902     int pos = indexIn(string);
903     if (pos == -1) {
904       return string;
905     }
906 
907     char[] chars = string.toCharArray();
908     int spread = 1;
909 
910     // This unusual loop comes from extensive benchmarking
911     OUT: while (true) {
912       pos++;
913       while (true) {
914         if (pos == chars.length) {
915           break OUT;
916         }
917         if (matches(chars[pos])) {
918           break;
919         }
920         chars[pos - spread] = chars[pos];
921         pos++;
922       }
923       spread++;
924     }
925     return new String(chars, 0, pos - spread);
926   }
927 
928   /**
929    * Returns a string containing all matching characters of a character sequence, in order. For
930    * example: <pre>   {@code
931    *
932    *   CharMatcher.is('a').retainFrom("bazaar")}</pre>
933    *
934    * ... returns {@code "aaa"}.
935    */
936   @CheckReturnValue
937   public String retainFrom(CharSequence sequence) {
938     return negate().removeFrom(sequence);
939   }
940 
941   /**
942    * Returns a string copy of the input character sequence, with each character that matches this
943    * matcher replaced by a given replacement character. For example: <pre>   {@code
944    *
945    *   CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
946    *
947    * ... returns {@code "rodor"}.
948    *
949    * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
950    * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
951    * character.
952    *
953    * @param sequence the character sequence to replace matching characters in
954    * @param replacement the character to append to the result string in place of each matching
955    *        character in {@code sequence}
956    * @return the new string
957    */
958   @CheckReturnValue
959   public String replaceFrom(CharSequence sequence, char replacement) {
960     String string = sequence.toString();
961     int pos = indexIn(string);
962     if (pos == -1) {
963       return string;
964     }
965     char[] chars = string.toCharArray();
966     chars[pos] = replacement;
967     for (int i = pos + 1; i < chars.length; i++) {
968       if (matches(chars[i])) {
969         chars[i] = replacement;
970       }
971     }
972     return new String(chars);
973   }
974 
975   /**
976    * Returns a string copy of the input character sequence, with each character that matches this
977    * matcher replaced by a given replacement sequence. For example: <pre>   {@code
978    *
979    *   CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
980    *
981    * ... returns {@code "yoohoo"}.
982    *
983    * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
984    * off calling {@link #replaceFrom(CharSequence, char)} directly.
985    *
986    * @param sequence the character sequence to replace matching characters in
987    * @param replacement the characters to append to the result string in place of each matching
988    *        character in {@code sequence}
989    * @return the new string
990    */
991   @CheckReturnValue
992   public String replaceFrom(CharSequence sequence, CharSequence replacement) {
993     int replacementLen = replacement.length();
994     if (replacementLen == 0) {
995       return removeFrom(sequence);
996     }
997     if (replacementLen == 1) {
998       return replaceFrom(sequence, replacement.charAt(0));
999     }
1000 
1001     String string = sequence.toString();
1002     int pos = indexIn(string);
1003     if (pos == -1) {
1004       return string;
1005     }
1006 
1007     int len = string.length();
1008     StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
1009 
1010     int oldpos = 0;
1011     do {
1012       buf.append(string, oldpos, pos);
1013       buf.append(replacement);
1014       oldpos = pos + 1;
1015       pos = indexIn(string, oldpos);
1016     } while (pos != -1);
1017 
1018     buf.append(string, oldpos, len);
1019     return buf.toString();
1020   }
1021 
1022   /**
1023    * Returns a substring of the input character sequence that omits all characters this matcher
1024    * matches from the beginning and from the end of the string. For example: <pre>   {@code
1025    *
1026    *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
1027    *
1028    * ... returns {@code "cat"}.
1029    *
1030    * <p>Note that: <pre>   {@code
1031    *
1032    *   CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
1033    *
1034    * ... is equivalent to {@link String#trim()}.
1035    */
1036   @CheckReturnValue
1037   public String trimFrom(CharSequence sequence) {
1038     int len = sequence.length();
1039     int first;
1040     int last;
1041 
1042     for (first = 0; first < len; first++) {
1043       if (!matches(sequence.charAt(first))) {
1044         break;
1045       }
1046     }
1047     for (last = len - 1; last > first; last--) {
1048       if (!matches(sequence.charAt(last))) {
1049         break;
1050       }
1051     }
1052 
1053     return sequence.subSequence(first, last + 1).toString();
1054   }
1055 
1056   /**
1057    * Returns a substring of the input character sequence that omits all characters this matcher
1058    * matches from the beginning of the string. For example: <pre> {@code
1059    *
1060    *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
1061    *
1062    * ... returns {@code "catbab"}.
1063    */
1064   @CheckReturnValue
1065   public String trimLeadingFrom(CharSequence sequence) {
1066     int len = sequence.length();
1067     for (int first = 0; first < len; first++) {
1068       if (!matches(sequence.charAt(first))) {
1069         return sequence.subSequence(first, len).toString();
1070       }
1071     }
1072     return "";
1073   }
1074 
1075   /**
1076    * Returns a substring of the input character sequence that omits all characters this matcher
1077    * matches from the end of the string. For example: <pre> {@code
1078    *
1079    *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
1080    *
1081    * ... returns {@code "abacat"}.
1082    */
1083   @CheckReturnValue
1084   public String trimTrailingFrom(CharSequence sequence) {
1085     int len = sequence.length();
1086     for (int last = len - 1; last >= 0; last--) {
1087       if (!matches(sequence.charAt(last))) {
1088         return sequence.subSequence(0, last + 1).toString();
1089       }
1090     }
1091     return "";
1092   }
1093 
1094   /**
1095    * Returns a string copy of the input character sequence, with each group of consecutive
1096    * characters that match this matcher replaced by a single replacement character. For example:
1097    * <pre>   {@code
1098    *
1099    *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
1100    *
1101    * ... returns {@code "b-p-r"}.
1102    *
1103    * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
1104    * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
1105    * character.
1106    *
1107    * @param sequence the character sequence to replace matching groups of characters in
1108    * @param replacement the character to append to the result string in place of each group of
1109    *        matching characters in {@code sequence}
1110    * @return the new string
1111    */
1112   @CheckReturnValue
1113   public String collapseFrom(CharSequence sequence, char replacement) {
1114     // This implementation avoids unnecessary allocation.
1115     int len = sequence.length();
1116     for (int i = 0; i < len; i++) {
1117       char c = sequence.charAt(i);
1118       if (matches(c)) {
1119         if (c == replacement
1120             && (i == len - 1 || !matches(sequence.charAt(i + 1)))) {
1121           // a no-op replacement
1122           i++;
1123         } else {
1124           StringBuilder builder = new StringBuilder(len)
1125               .append(sequence.subSequence(0, i))
1126               .append(replacement);
1127           return finishCollapseFrom(sequence, i + 1, len, replacement, builder, true);
1128         }
1129       }
1130     }
1131     // no replacement needed
1132     return sequence.toString();
1133   }
1134 
1135   /**
1136    * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
1137    * groups of matching characters at the start or end of the sequence are removed without
1138    * replacement.
1139    */
1140   @CheckReturnValue
1141   public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
1142     // This implementation avoids unnecessary allocation.
1143     int len = sequence.length();
1144     int first;
1145     int last;
1146 
1147     for (first = 0; first < len && matches(sequence.charAt(first)); first++) {}
1148     for (last = len - 1; last > first && matches(sequence.charAt(last)); last--) {}
1149 
1150     return (first == 0 && last == len - 1)
1151         ? collapseFrom(sequence, replacement)
1152         : finishCollapseFrom(
1153               sequence, first, last + 1, replacement,
1154               new StringBuilder(last + 1 - first),
1155               false);
1156   }
1157 
1158   private String finishCollapseFrom(
1159       CharSequence sequence, int start, int end, char replacement,
1160       StringBuilder builder, boolean inMatchingGroup) {
1161     for (int i = start; i < end; i++) {
1162       char c = sequence.charAt(i);
1163       if (matches(c)) {
1164         if (!inMatchingGroup) {
1165           builder.append(replacement);
1166           inMatchingGroup = true;
1167         }
1168       } else {
1169         builder.append(c);
1170         inMatchingGroup = false;
1171       }
1172     }
1173     return builder.toString();
1174   }
1175 
1176   /**
1177    * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #matches}
1178    *     instead.
1179    */
1180   @Deprecated
1181   @Override
1182   public boolean apply(Character character) {
1183     return matches(character);
1184   }
1185 
1186   /**
1187    * Returns a string representation of this {@code CharMatcher}, such as
1188    * {@code CharMatcher.or(WHITESPACE, JAVA_DIGIT)}.
1189    */
1190   @Override
1191   public String toString() {
1192     return description;
1193   }
1194 
1195   static final String WHITESPACE_TABLE = ""
1196       + "\u2002\u3000\r\u0085\u200A\u2005\u2000\u3000"
1197       + "\u2029\u000B\u3000\u2008\u2003\u205F\u3000\u1680"
1198       + "\u0009\u0020\u2006\u2001\u202F\u00A0\u000C\u2009"
1199       + "\u3000\u2004\u3000\u3000\u2028\n\u2007\u3000";
1200   static final int WHITESPACE_MULTIPLIER = 1682554634;
1201   static final int WHITESPACE_SHIFT = Integer.numberOfLeadingZeros(WHITESPACE_TABLE.length() - 1);
1202 
1203   /**
1204    * Determines whether a character is whitespace according to the latest Unicode standard, as
1205    * illustrated
1206    * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
1207    * This is not the same definition used by other Java APIs. (See a
1208    * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
1209    * definitions of "whitespace"</a>.)
1210    *
1211    * <p><b>Note:</b> as the Unicode definition evolves, we will modify this constant to keep it up
1212    * to date.
1213    */
1214   public static final CharMatcher WHITESPACE = new FastMatcher("WHITESPACE") {
1215     @Override
1216     public boolean matches(char c) {
1217       return WHITESPACE_TABLE.charAt((WHITESPACE_MULTIPLIER * c) >>> WHITESPACE_SHIFT) == c;
1218     }
1219   };
1220 }